First of all load all packages needed for further computations.

library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(xtable)
library(doBy)
library(scales)
library(gridExtra)
library(plotly)

Load and rename columns for convenience in further ploting for the ATM data set.

ATM=read.table("http://www.trutschnig.net/Datensatz.txt",head=TRUE)
ATM$ymd=as.Date(ATM$ymd)
names(ATM)[names(ATM)=="ymd"] = "date"
names(ATM)[names(ATM)=="sum_out"] = "cash"

Load and sample from the RTR Data due to computational restrictions.

address = url("http://www.trutschnig.net/RTR2015.RData")
load(address)
RTR2015_sample = RTR2015[sample(nrow(RTR2015), 2500), ]

ATM dataset

A simple ggplot example with the ATM data and alpha of 0.3.

p = ggplot(ATM, aes(date, cash)) +
  geom_line(aes(group = weekday),alpha = 0.3)
p


Now plotly comes into play. Its function subplot() merges ggplot2 objects and plotly objects into one plotly object. The ggplotly() function converts ggplot2 objects into plotly objects.

subplot(
  p, ggplotly(p), 
  ggplot(ATM, aes(date, cash)) + geom_bin2d(),
  ggplot(ATM, aes(date, cash)) + geom_hex(),
  nrows = 2, shareX = TRUE, shareY = TRUE,
  titleY = T, titleX = T
)



Let us use the plot_ly() function for cash withdrawals grouped by weekays. The information shown for hovering is confined to weekdays.

ATMgrouped = group_by(ATM, weekday)
p = plot_ly(ATMgrouped, x = ~date, y = ~cash, mode = 'markers',
            hoverinfo = 'text',
            text = ~paste('</br> weekday: ', weekday))
p



Exercise: Modify the plot_ly() function in that hovering over datapoints shows the information holiday and cash withdrawn (hint: use columns ‘holiday’ and ‘cash’).

ATMgrouped = group_by(ATM, weekday)
p = plot_ly(ATMgrouped, x = ~date, y = ~cash, mode = 'markers',
            hoverinfo = 'text',
            text = ~paste('</br> weekday: ', weekday,
                          '</br> cash: ', cash,
                          '</br> holiday: ', holiday))
p



Nested add_lines() functions.

add_lines(
  add_lines(p, alpha = 0.2, name = "cash withdrawals"),
  name = "friday", data = filter(ATM, weekday == "Fri")
)



Plotly objects can be piped as any other data, let us pipe the plotly object p from before to the add_lines() function.

p %>% 
  add_lines(name = ~"cash", alpha = 0.2)



A plotly object with the lines grouped by weekdays and no information for hovering and avoiding nested functions.

allWeekdays = ATM %>%
  group_by(weekday) %>%
  plot_ly(x = ~date, y = ~cash) %>%
  add_lines(alpha = 0.2, name = "cash withdrawals", hoverinfo = "none")
allWeekdays



Exercise: Highlight the series of cash withdrawals for fridays within the last plot (hint: you can use the function filter() from the dplyr package). Please pipe your code to the function called ‘rangeslider()’. Do you think this function is useful when dealing with time series ?

allWeekdays %>%
  filter(weekday == "Fri") %>%
  add_lines(name = "Fri") %>% 
  rangeslider()



The add_lines() function provides different line types and colors for the lines in a plot. Let us see with using the subplot() function again.

subplot(
  add_lines(p, color = ~weekday),
  add_lines(p, linetype = ~weekday),
  shareX = TRUE, nrows = 2
)



The add_fun() function.

allWeekdays %>%
  add_fun(function(plot) {
    plot %>% filter(weekday == "Fri") %>% add_lines(name = "Fri")
  }) %>%
  add_fun(function(plot) {
    plot %>% filter(weekday == "Mon") %>% 
      add_lines(name = "Mon")
  }) %>%
  rangeslider()



Exercise: Use the rangeslider() function with the parameter allWeekdays. Why is it now that mondays and fridays are not highlighted anymore ?

rangeslider(allWeekdays)

RTR dataset



Regression methods applied to download speed and upload speed over all providers using ggplot() and ggplotly().

p = ggplot(RTR2015_sample, aes(x = rtr_speed_ul, y = rtr_speed_dl)) +
  geom_point(alpha = 0.05, color = "magenta") + 
  geom_smooth(color = "blue") +
  geom_smooth(method = "lm", se = F, color = "black")

ggplotly(p, hoverinfo = "none")



The function plotly_data() returns data associated with a plotly visualization.

p %>%
  ggplotly(layerData = 2, originalData = FALSE, hoverinfo = "none") %>%
  plotly_data()
## # A tibble: 80 x 13
##        x      y    ymin   ymax    se PANEL group colour fill    size
##  * <dbl>  <dbl>   <dbl>  <dbl> <dbl> <int> <int> <chr>  <chr>  <dbl>
##  1    1    740.  -1387.  2868. 1086.     1    -1 blue   grey60     1
##  2  616.  8462.   6593. 10331.  954.     1    -1 blue   grey60     1
##  3 1231.  7767.   6067.  9467.  867.     1    -1 blue   grey60     1
##  4 1846.  8649.   7030. 10268.  826.     1    -1 blue   grey60     1
##  5 2460.  8714.   7358. 10070.  692.     1    -1 blue   grey60     1
##  6 3075. 11723.  10445. 13002.  652.     1    -1 blue   grey60     1
##  7 3690. 16483.  14838. 18128.  839.     1    -1 blue   grey60     1
##  8 4305. 20035.  18136. 21933.  969.     1    -1 blue   grey60     1
##  9 4920. 22791.  21036. 24547.  896.     1    -1 blue   grey60     1
## 10 5535. 25211.  23523. 26899.  861.     1    -1 blue   grey60     1
## # ... with 70 more rows, and 3 more variables: linetype <dbl>,
## #   weight <dbl>, alpha <dbl>



The function add_segments() and add_annotations().

p %>%
  ggplotly(layerData = 2, originalData = F) %>%
  add_fun(function(p) {
    p %>% slice(which.max(se)) %>%
      add_segments(x = ~x, xend = ~x, y = ~ymin, yend = ~ymax) %>%
      add_annotations("Maximum uncertainty", ax = 60)
  }) %>%
  add_fun(function(p) {
    p %>% slice(which.min(se)) %>%
      add_segments(x = ~x, xend = ~x, y = ~ymin, yend = ~ymax) %>%
      add_annotations("Minimum uncertainty")
  })



Illustration of a potential cure for overplotting.

subplot(
  plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul, name = "default"),
  plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul) %>% 
    add_markers(alpha = 0.2, name = "alpha = 0.2"),
  plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul) %>% 
    add_markers(alpha = 0.02, name = "alpha = 0.02")
)



A simple 3D plot with plot_ly()

plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul, z = ~rtr_ping)



The add_markers() function for differentiating the data points with categorical information.

plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul, z = ~rtr_ping) %>% 
  add_markers(alpha = 0.2, color = ~op_name)



The same plot from before in 2 dimensions.

plot_ly(RTR2015_sample, x = ~rtr_speed_dl, y = ~rtr_speed_ul) %>%
  add_markers(alpha = 0.2, color = ~op_name)



Dendrogram - going deeper into the inspection of download speed and upload speed.

x = RTR2015_sample$rtr_speed_dl
y = RTR2015_sample$rtr_speed_ul

s = subplot(
  plot_ly(RTR2015_sample, x = x, color = I("green")) %>% add_trace(x = x, name = 'download speed'), 
  plotly_empty(), 
  plot_ly(RTR2015_sample, x = x, y = y, color = I("blue")) %>%  add_markers(alpha = 0.2, color = ~op_name), 
  plot_ly(y = y, color = I("blue")) %>% add_trace(y = y, name = 'upload speed'),
  nrows = 2, heights = c(0.2, 0.8), widths = c(0.8, 0.2), 
  shareX = TRUE, shareY = TRUE, titleX = FALSE, titleY = FALSE
)
layout(s, showlegend = TRUE)



Exercise: Create a dendrogramm of a sample of 2500 for the RTR2015 dataset only for 4G. What ist the difference in the interpretation, so do the operators treat the relation of upload to download speed differently by technology ? It is about the distribution !

RTR2015_4G = RTR2015[ which(RTR2015$nw_cat=='4G'), ]
RTR2015_4G_sample = RTR2015_4G[sample(nrow(RTR2015_4G), 2500), ]



x = RTR2015_4G_sample$rtr_speed_dl
y = RTR2015_4G_sample$rtr_speed_ul

s = subplot(
  plot_ly(RTR2015_4G_sample, x = x, color = I("green")) %>% add_trace(x = x, name = 'download speed'), 
  plotly_empty(), 
  plot_ly(RTR2015_4G_sample, x = x, y = y, color = I("blue")) %>%  add_markers(alpha = 0.2, color = ~op_name), 
  plot_ly(y = y, color = I("blue")) %>% add_trace(y = y, name = 'upload speed'),
  nrows = 2, heights = c(0.2, 0.8), widths = c(0.8, 0.2), 
  shareX = TRUE, shareY = TRUE, titleX = FALSE, titleY = FALSE
)
layout(s, showlegend = TRUE)



Moscaicplot using the package ggmosaic.

library(ggmosaic)

p = ggplot(data = RTR2015_sample) + 
  geom_mosaic(aes(x = product(op_name, device_has_lte), fill = factor(nw_cat)),
  divider = ddecker(), offset = 0.05) + 
  labs(x = "operator and LTE", y = "proportion", title = 'Mosaicplot') + 
  guides(fill = guide_legend(title = "technology"))
ggplotly(p)